import urllib.request as req,urllib.parse as parse
from bs4 import BeautifulSoup
import xlwt
workboot=xlwt.Workbook(encoding="utf-8")  #创建workbook对象
worksheet=workboot.add_sheet("sheet1")  #创建sheet
worksheet.write(0,0,"hello")  #行、列，数据
workboot.save("./hello.xlsx")

import re   #正则表达式，文字匹配



file=open("./douban.html","rb")
html=file.read()
bs=BeautifulSoup(html,"html.parser")
# print(bs.title)
# print(bs.a.attrs['href'])
# print(bs.head.contents)
print(bs.find_all(re.compile("a")))





# if __name__ =="__main__":
#     resp=req.urlopen("http://www.baidu.com")
#     print(resp.read().decode("utf-8"))
# data=parse.urlencode({"hello":"world"}).encode("utf-8")
# resp=req.urlopen("http://www.httpbin.org/post",data)
# print(resp.read().decode("utf-8"))

# data=parse.urlencode({"hello":"world"}).encode("utf-8")
# # #构建请求对象
# obj=req.Request(url="https://www.douban.com",data=data,method="POST",headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36"})
# resp=req.urlopen(obj)
# print(resp.read().decode("utf-8"))
# print(resp.status)
# print(obj.headers)